********************************************************
* Extract Bond Data from WRDS
*
* Uses info from:
* Dick-Nielsen, Jens (2009). Liquidity biases in TRACE.  
*	Journal of Fixed Income, 19(2):43-55.
********************************************************;

libname trace '/wrds/traceaxs/trace/sasdata/';
libname bond '/home/nwu/nwu1/crotty/ra2011sept/';


********************************************************
* TRACE data
*	1.  trace_names = 9-digit cusip, TRACE bond symbol, ticker, start and end dates
*	2.  trace = transactions database
********************************************************;

data bond.trace_names; set trace.trace_names; run;


*Subset TRACE data for after 2007;    
data bond.trace2007;
    set trace.trace (where = (trd_exctn_dt GE mdy(1,1,2007)));
run;



*This step takes a long time if done remotely - run in WRDS server;

* The name of the original TRACE dataset supplied to the program;
%LET navnIN = trace2007;

* The name given to the cleaned TRACE dataset;
%Let navnOUT = traceCLEAN;





* The time limit to be used when matching agency transactions;
* Reports that match on bond, volume, price and date within this
* time frame will be considered to be the part of the same agency
* transaction;
%LET time_limit = 60;

**********************************************************************************
* Start of the program
***********************************************************************************;
* Temporary datasets that can be deleted after use;
%LET temp1 = trash1;
%LET temp2 = trash2;
%LET temp3 = trash3;

* Reads the TRACE data and changes the volume to a numeric variable;
%let varlist = bond_sym_id trd_exctn_dt msg_seq_nb asof_cd trd_exctn_tm rptd_pr trc_st orig_msg_seq_nb company_symbol cmsn_trd wis_fl cusip_id spcl_trd_fl sale_cndtn_cd sale_cndtn_cd yld_pt high_yld_pt low_yld_pt lsal_yld_pt;


data bond.&navnOUT;
  	set bond.&navnIN (keep = &varlist ascii_rptd_vol_tx);
  
  	if ascii_rptd_vol_tx = '5MM+' then vol2 = '5000000';
  	else if ascii_rptd_vol_tx = '1MM+' then vol2 = '1000000';
  	else vol2 = ascii_rptd_vol_tx;

  	vol=INPUT(vol2,7.);

  	* A counter that keeps track of the original report order;
  	place=_N_;

 	* Delayed disseminations are treated after the same guidelines as non-delayed;
  	if asof_cd='X' then asof_cd='R';
  	if asof_cd='D' then asof_cd='A';

run;


******************
* Step1
******************;
*Find number of firms in original dataset;
proc sort data=bond.&navnOUT (keep= bond_sym_id company_symbol) out=bond.firm_no nodupkey; by company_symbol bond_sym_id; run;
proc sort data=bond.firm_no (keep= company_symbol) out=bond.firm_no nodupkey; by company_symbol; run;

* Removes all trades with the same intra-day (and intra-bond) message sequence number;
Proc Sort Data=bond.&navnOUT (keep=&varlist vol place) out=bond.&navnOUT nodupkey;
 	by bond_SYM_ID trd_exctn_dt MSG_SEQ_NB;
Run;

*Find number of firms after step 1;
proc sort data=bond.&navnOUT (keep= bond_sym_id company_symbol) out=bond.bond_no1 nodupkey; by company_symbol bond_sym_id; run;
proc sort data=bond.bond_no1 (keep= company_symbol) out=bond.firm_no1 nodupkey; by company_symbol; run;


******************
* Step2
******************;
* Takes all reversals out in a separate dataset;
data bond.&temp1;
 	set bond.&navnOUT;
 	where asof_cd='R';
run;

* Sort the reversal dataset so that it can be merged with the original;
Proc Sort data=bond.&temp1;
	by bond_SYM_ID trd_exctn_dt trd_exctn_tm rptd_pr vol;
run;

* Sort the original dataset so it can be merged with the reversals;
Proc Sort data=bond.&navnOUT;
	by bond_SYM_ID trd_exctn_dt trd_exctn_tm rptd_pr vol;
run;

* Merges the original dataset with the reversals;
* Deletes all reversals and reports matching them;
data bond.&temp2;
 	merge bond.&temp1 (in=rev) bond.&navnOUT;
 	by bond_SYM_ID trd_exctn_dt trd_exctn_tm rptd_pr vol;
 	if rev=1 && asof_cd NE 'A' && asof_cd NE 'R';
run;
*Records in &temp2 are those that need to be reversed/deleted;


* Takes out only one matching report in a separate dataset, ie. only reverse one matching report for each reversal;
proc sort data=bond.&temp2 noduPkey;
	by bond_SYM_ID trd_exctn_dt trd_exctn_tm rptd_pr vol;
run;

* Sorting the main data on the original place;
proc sort data=bond.&navnOUT;
	by place;
run;

* Sorting the matching reports on the original place number; 
proc sort data=bond.&temp2;
	by place;
run;

* Merging so that we delete the reversal and one original report;
data bond.&navnOUT;
 	merge bond.&temp2 (in=rev) bond.&navnOUT;
 	by place;

 	if rev=1 then delete;
 	if asof_cd='R' then delete;
run;

*Find number of firms after step 2;
proc sort data=bond.&navnOUT (keep= bond_sym_id company_symbol) out=bond.bond_no2 nodupkey; by company_symbol bond_sym_id; run;
proc sort data=bond.bond_no2 (keep= company_symbol) out=bond.firm_no2 nodupkey; by company_symbol; run;



********************
* Step3
********************;

* Sorting on the message sequence number in order to handle the sameday corrections;
 Proc Sort Data=bond.&navnOUT nodupkey;
 	by bond_SYM_ID trd_exctn_dt MSG_SEQ_NB;
 Run;

* Takes out the sameday corrections/cancellations into a separate dataset;
* Renames the 'original messages sequence number' to 'message sequence number';
* Once this is done we can merge it back on the dataset from step 2; 
data bond.&temp3;
 	set bond.&navnOUT;
 	If TRC_ST NE 'T';           *Only keep cancellations and corrections;
 	MSG_SEQ_NB=ORIG_MSG_SEQ_NB;
run;

* Sorts the data so that we can merge later;
Proc Sort Data=bond.&temp3;
	 by bond_SYM_ID trd_exctn_dt MSG_SEQ_NB;
Run; 

* Merge the sameday corrections with the data from step 2;
* Deletes the original report and the new if it is a cancelation;
data bond.&navnOUT;
 	merge bond.&temp3 (in=same) bond.&navnOUT;
 	by bond_SYM_ID trd_exctn_dt MSG_SEQ_NB;

 	if same=1 then delete;      *This deletes the original, incorrect trade observation;
 	if TRC_ST='C' then delete;  *This deletes the cancellation observation;
   *Note that the correction observation is retained because no matches will occur on msg_seq_nb for those;

run;

*Find number of firms after step 3;
proc sort data=bond.&navnOUT (keep= bond_sym_id company_symbol) out=bond.bond_no3 nodupkey; by company_symbol bond_sym_id; run;
proc sort data=bond.bond_no3 (keep= company_symbol) out=bond.firm_no3 nodupkey; by company_symbol; run;


********************
* Agency step
********************;
* Sorting the reports for the next datastep;
proc sort data=bond.&navnOUT nodup;  *nodup compares all variables in your dataset/nodupkey compares only the variables in the by list;
	by bond_SYM_ID trd_exctn_dt rptd_pr vol trd_exctn_tm;
run;

* Removes all but one record of an agency transaction;
* WITHIN X seconds of an identical transaction;
data bond.&navnOUT (drop = first1 last1 time_diff);
 	set bond.&navnOUT;
 	by bond_SYM_ID trd_exctn_dt rptd_pr vol;
 	first1 = first.vol;
 	last1 = last.vol;
 	time_diff = trd_exctn_tm-lag(trd_exctn_tm);
 	if first1=0 AND time_diff<&time_limit then delete;
run;

*Find number of firms after agency step;
proc sort data=bond.&navnOUT (keep= bond_sym_id company_symbol) out=bond.bond_no4 nodupkey; by company_symbol bond_sym_id; run;
proc sort data=bond.bond_no4 (keep= company_symbol) out=bond.firm_no4 nodupkey; by company_symbol; run;




***************************
* Only keep 1 obs per day
*
***************************;
proc sort data = bond.traceclean; 
	by bond_sym_id company_symbol trd_exctn_dt trd_exctn_tm; 
run;
data bond.yields;
	set bond.traceclean (keep = bond_sym_id company_symbol cusip_id trd_exctn_dt 
						 asof_cd rptd_pr yld_pt trd_exctn_tm where=(asof_cd=""));
	by bond_sym_id company_symbol trd_exctn_dt;
    if last.trd_exctn_dt =1;
	drop asof_cd;
	run;
proc contents data = bond.yields; run;


*Export to Stata;
proc export data = bond.yields DBMS = dta
        OUTFILE = '/home/nwu/nwu1/crotty/ra2011sept/yields_full' replace; run;
    

    
